Pull in mouse cell metadata and retain eye cells.
Output the barcodes.
library(tidyverse)
sample_meta <- data.table::fread('~/git/scEiaD_quant/sample_meta.scEiaD_v1.2024_02_28.01.tsv.gz')
cell_meta <- data.table::fread('~/data/scEiaD_modeling/mm111.adata.solo.20240827.obs.csv.gz')[,-1] %>%
relocate(barcode) %>%
filter(solo_doublet == "FALSE")
sceiad_meta <- fst::read_fst('~/data/scEiaD_2022_02/meta_filter.fst')
cell_meta <- cell_meta %>%
left_join(sceiad_meta %>% select(barcode = Barcode, CellType_predict), by = 'barcode')
mm111_eye <- cell_meta %>%
filter(
organ == 'Eye',
organism == 'Mus musculus',
!grepl("^#", sample_accession),
source == 'Tissue')
#mm111_eye$barcode %>% write(gzfile('~/git/scEiaD_modeling/data/mm111_eye_bcs.20250115.csv.gz'))
Use existing models to predict cell types:
# rsync the csv to biowulf2 (not shown)
cd /data/OGVFB_BG/scEiaD/2024_02_28/snakeout/mm111_mature_eye_full
sinteractive --mem=128G --time=8:00:00 --gres=gpu:a100:1
source /data/$USER/conda/etc/profile.d/conda.sh && source /data/$USER/conda/etc/profile.d/mamba.sh
mamba activate rapids_singlecell
bash ct_projection_call.sh
cd /Users/mcgaugheyd/data/scEiaD_modeling/mm111_mature_eye_full
rsync -Prav h2:/data/OGVFB_BG/scEiaD/2024_02_28/snakeout/mm111_mature_eye_full/*CT_projections_20120114* .
ctp_mm111__mrca <- data.table::fread('/Users/mcgaugheyd/data/scEiaD_modeling/mm111_mature_eye_full/mm111.eye.mouse_CT_projections_20120115.csv.gz') %>% select(-17) %>% left_join(sceiad_meta %>% select(barcode = Barcode, CellType_predict), by = 'barcode') %>%
# only look at >= 10 day cells
mutate(age = as.numeric(age)) %>%
filter(age >= 10 | is.na(age))
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `age = as.numeric(age)`.
Caused by warning:
! NAs introduced by coercion
label <- 'MajorCellType'
machine_label <- 'CT__chen_mrca'
most_common_mislabel <- ctp_mm111__mrca %>%
group_by(.data[[label]],.data[[machine_label]]) %>%
summarise(Count = n()) %>%
arrange(.data[[label]], -Count) %>%
slice_max(order_by = Count, n = 2) %>%
filter(.data[[label]] != .data[[machine_label]])
`summarise()` has grouped output by 'MajorCellType'. You can override using the `.groups` argument.
ctp_mm111__mrca %>%
mutate(tf = case_when(.data[[label]] == .data[[machine_label]] ~ TRUE,
TRUE ~ FALSE)) %>%
group_by(.data[[label]]) %>%
summarise(accuracy = sum(tf)/length(tf)) %>%
arrange(.data[[label]]) %>%
left_join(most_common_mislabel %>%
select(any_of(label),
`most common mislabel` = any_of(machine_label),
`mislabel count` = Count))
Joining with `by = join_by(MajorCellType)`
umap1 = 'umap1_chen_mrca'
umap2 = 'umap2_chen_mrca'
ct = 'CT__chen_mrca'
color = 'CT__chen_mrca'
score <- 'CT__chen_mrca__max_score'
umap_ct_plotter <- function(obj, umap1, umap2, ct, color, labels = TRUE){
plot <- obj %>%
mutate(leiden = as.factor(leiden)) %>%
ggplot(aes(x=.data[[umap1]],y=.data[[umap2]])) +
scattermore::geom_scattermore(aes(color = .data[[color]]), pointsize = 0.8, alpha = 0.5) +
scale_color_manual(values = c(pals::alphabet2(), pals::glasbey(), pals::kelly(), pals::alphabet()) %>%
unname()) +
cowplot::theme_cowplot() + theme(legend.position = "none")
if (labels){
plot <- plot + ggrepel::geom_label_repel(data = . %>%
group_by(.data[[ct]]) %>%
summarise(across(all_of(c(umap1,umap2)), median)),
aes(label = .data[[ct]], color = .data[[color]]))
}
print(plot)
}
umap_score_plotter <- function(obj, umap1, umap2, ct, score){
obj %>%
ggplot(aes(x=.data[[umap1]],y=.data[[umap2]])) +
scattermore::geom_scattermore(aes(color = .data[[score]]), pointsize = 0.8, alpha = 0.5) +
ggrepel::geom_label_repel(data = . %>%
group_by(.data[[ct]]) %>%
summarise(across(all_of(c(umap1,umap2)), median)),
aes(label = .data[[ct]])) +
scale_color_viridis_c() +
cowplot::theme_cowplot() + theme(legend.position = "none")
}
umap_ct_plotter(ctp_mm111__mrca, umap1, umap2, ct, color)
umap_ct_plotter(ctp_mm111__mrca, umap1, umap2, 'leiden', 'leiden')
umap_score_plotter(ctp_mm111__mrca, umap1, umap2, ct, score)
umap1 = 'umap1_chen_mrca'
umap2 = 'umap2_chen_mrca'
ct = 'CellType_predict'
color = 'CellType_predict'
umap_ct_plotter(ctp_mm111__mrca, umap1, umap2, ct, color)
ct <- 'CellType_predict'
bar_plotter <- function(data = ctp_mm111, a = 'leiden', b = ct){
data %>%
mutate(leiden = as.factor(leiden)) %>%
group_by(.data[[a]], .data[[b]]) %>%
summarise(Count = n()) %>%
mutate(Ratio = Count / sum(Count),
across(all_of(b), stringr::str_wrap,width = 15)) %>%
filter(Ratio > 0.05) %>%
ggplot(aes(x=Ratio,y=.data[[a]], fill =
.data[[b]], label =
.data[[b]])) +
geom_bar(stat='identity') +
#shadowtext::geom_shadowtext(position = position_stack(vjust = 0.5), color = 'black', bg.colour='white') +
ggrepel::geom_text_repel(position = position_stack(vjust = 0.5), bg.color = 'white', direction = 'x') +
scale_fill_manual(values = c(pals::alphabet2(), pals::glasbey()) %>% unname()) +
cowplot::theme_cowplot() +
#scale_x_continuous(limits = c(0,1.1)) +
coord_cartesian(clip = "off")
}
bar_plotter(ctp_mm111__mrca, b = ct)
`summarise()` has grouped output by 'leiden'. You can override using the `.groups` argument.
ct <- 'CT__chen_mrca'
bar_plotter(ctp_mm111__mrca, b = ct)
`summarise()` has grouped output by 'leiden'. You can override using the `.groups` argument.
ctp_mm111_sceiad <- data.table::fread('/Users/mcgaugheyd/data/scEiaD_modeling/mm111_mature_eye_full/mm111.eye.human_CT_projections_20120115.csv.gz') %>% select(-17) %>% left_join(sceiad_meta %>% select(barcode = Barcode, CellType_predict), by = 'barcode')%>%
# only look at >= 10 day cells
mutate(age = as.numeric(age)) %>%
filter(age >= 10 | is.na(age))
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `age = as.numeric(age)`.
Caused by warning:
! NAs introduced by coercion
label <- 'CellType_predict'
machine_label <- 'CT__sceiad_20250107_full_mmGeneFilter'
most_common_mislabel <- ctp_mm111_sceiad %>%
group_by(.data[[label]],.data[[machine_label]]) %>%
summarise(Count = n()) %>%
arrange(.data[[label]], -Count) %>%
slice_max(order_by = Count, n = 2) %>%
filter(.data[[label]] != .data[[machine_label]])
`summarise()` has grouped output by 'CellType_predict'. You can override using the `.groups` argument.
ctp_mm111_sceiad %>%
mutate(tf = case_when(.data[[label]] == .data[[machine_label]] ~ TRUE,
TRUE ~ FALSE)) %>%
group_by(.data[[label]]) %>%
summarise(accuracy = sum(tf)/length(tf)) %>%
arrange(.data[[label]]) %>%
left_join(most_common_mislabel %>%
select(any_of(label),
`most common mislabel` = any_of(machine_label),
`mislabel count` = Count))
Joining with `by = join_by(CellType_predict)`
umap1 = 'umap1_sceiad_20250107_full_mmGeneFilter'
umap2 = 'umap2_sceiad_20250107_full_mmGeneFilter'
ct = 'CT__sceiad_20250107_full_mmGeneFilter'
color = 'CT__sceiad_20250107_full_mmGeneFilter'
score <- 'CT__sceiad_20250107_full_mmGeneFilter__max_score'
umap_ct_plotter(ctp_mm111_sceiad, umap1, umap2, ct, color)
umap_ct_plotter(ctp_mm111_sceiad, umap1, umap2, 'leiden', 'leiden')
umap_score_plotter(ctp_mm111_sceiad, umap1, umap2, ct, score)
umap1 = 'umap1_sceiad_20250107_full_mmGeneFilter'
umap2 = 'umap2_sceiad_20250107_full_mmGeneFilter'
ct = 'CellType_predict'
color = 'CellType_predict'
umap_ct_plotter(ctp_mm111_sceiad, umap1, umap2, ct, color)
ct <- 'CellType_predict'
bar_plotter <- function(data = ctp_mm111, a = 'leiden', b = ct){
data %>%
mutate(leiden = as.factor(leiden)) %>%
group_by(.data[[a]], .data[[b]]) %>%
summarise(Count = n()) %>%
mutate(Ratio = Count / sum(Count),
across(all_of(b), stringr::str_wrap,width = 15)) %>%
filter(Ratio > 0.05) %>%
ggplot(aes(x=Ratio,y=.data[[a]], fill =
.data[[b]], label =
.data[[b]])) +
geom_bar(stat='identity') +
#shadowtext::geom_shadowtext(position = position_stack(vjust = 0.5), color = 'black', bg.colour='white') +
ggrepel::geom_text_repel(position = position_stack(vjust = 0.5), bg.color = 'white', direction = 'x') +
scale_fill_manual(values = c(pals::alphabet2(), pals::glasbey()) %>% unname()) +
cowplot::theme_cowplot() +
scale_x_continuous(limits = c(0,1.1)) +
coord_cartesian(clip = "off")
}
bar_plotter(ctp_mm111_sceiad, 'leiden', ct)
`summarise()` has grouped output by 'leiden'. You can override using the `.groups` argument.
ct <- 'CT__sceiad_20250107_full_mmGeneFilter'
bar_plotter(ctp_mm111_sceiad, 'leiden', ct)
`summarise()` has grouped output by 'leiden'. You can override using the `.groups` argument.
Using: 1. MRCA Predictions 2. scEiaD Human Eye (mmGeneFilter) Predictions 3. MajorCellType (curated author) 4. CellType_predict (scEiaD 2022 publication machine labels)
bind_cols(
ctp_mm111__mrca,
ctp_mm111_sceiad %>% select(contains('geneFilter'))
) %>%
group_by(CT__sceiad_20250107_full_mmGeneFilter, MajorCellType, CellType_predict, CT__chen_mrca) %>%
mutate(CT__sceiad_20250107_full_mmGeneFilter = gsub("\\s\\(.*|\\srod","",CT__sceiad_20250107_full_mmGeneFilter),
CellType_predict = tolower(CellType_predict) %>% gsub("\\scell|\\sglia|\\srod","",.),
CT__chen_mrca = case_when(grepl("amacrine", CT__chen_mrca) ~ "amacrine",
grepl("retinal cone", CT__chen_mrca) ~ "cone",
grepl("retinal rod", CT__chen_mrca) ~ "rod",
grepl("bipolar", CT__chen_mrca) ~ "bipolar",
grepl("horizontal", CT__chen_mrca) ~ "horizontal",
grepl("retinal pigment", CT__chen_mrca) ~ "rpe",
TRUE ~ CT__chen_mrca) %>% tolower() %>% gsub(" cell","",.)) %>%
summarise(Count = n()) %>%
arrange(CT__sceiad_20250107_full_mmGeneFilter, -Count) %>%
filter(Count > 5) %>%
DT::datatable()
`summarise()` has grouped output by 'CT__sceiad_20250107_full_mmGeneFilter', 'MajorCellType', 'CellType_predict'. You can override using the `.groups` argument.
ct_call_counts <- bind_cols(
ctp_mm111__mrca,
ctp_mm111_sceiad %>% select(contains('geneFilter'))
) %>%
select(barcode, CT__sceiad_20250107_full_mmGeneFilter, MajorCellType, CellType_predict, CT__chen_mrca) %>%
mutate(CT__sceiad_20250107_full_mmGeneFilter = gsub("\\s\\(.*|\\srod","",CT__sceiad_20250107_full_mmGeneFilter),
CellType_predict = tolower(CellType_predict) %>% gsub("\\scell|\\sglia|\\srod","",.),
CT__chen_mrca = case_when(grepl("amacrine", CT__chen_mrca) ~ "amacrine",
grepl("retinal cone", CT__chen_mrca) ~ "cone",
grepl("retinal rod", CT__chen_mrca) ~ "rod",
grepl("bipolar", CT__chen_mrca) ~ "bipolar",
grepl("horizontal", CT__chen_mrca) ~ "horizontal",
grepl("retinal pigment", CT__chen_mrca) ~ "rpe",
TRUE ~ CT__chen_mrca) %>% tolower() %>% gsub(" cell","",.)) %>%
pivot_longer(-barcode) %>%
mutate(value = case_when(is.na(value) ~ '',
TRUE ~ value)) %>%
group_by(barcode, value) %>%
summarise(Count = n())
`summarise()` has grouped output by 'barcode'. You can override using the `.groups` argument.
consensus <- ct_call_counts %>%
filter(value != '', Count > 1) %>%
slice_max(order_by = Count, n = 1)
set.seed(20250154)
mm111_full_eye_ref_bcs <- ctp_mm111__mrca %>% ungroup() %>%
left_join(consensus %>% select(barcode, consensus = value, consensus_count = Count)) %>%
mutate(consensus = case_when(is.na(consensus) ~ 'Unknown', TRUE ~ consensus)) %>%
group_by(study_accession, consensus) %>%
sample_n(500, replace = TRUE) %>%
unique()
Joining with `by = join_by(barcode)`
mm111_full_eye_query_bcs <- ctp_mm111__mrca %>% ungroup() %>%
filter(!barcode %in% mm111_full_eye_ref_bcs$barcode)
#mm111_full_eye_ref_bcs$barcode %>% write(gzfile('~/git/scEiaD_modeling/data/mm111_mature_eye_ref_bcs.full.20250115.csv.gz'))
#mm111_full_eye_query_bcs$barcode %>% write(gzfile('~/git/scEiaD_modeling/data/mm111_mature_eye_query_bcs.full.20250115.csv.gz'))
Run scVI ~/git/scEiaD_modeling/Snakemake.wrapper.sh
pipeline with the ref / query barcodes
cd /data/OGVFB_BG/scEiaD/2024_02_28/snakeout/mm111_mature_eye_full
sbatch snakecall.sh
rsync -Prav h2:/data/OGVFB_BG/scEiaD/2024_02_28/snakeout/mm111_mature_eye_full/mm111_mature_eye_20250113_2000hvg_100e_20l.obs.csv.gz /Users/mcgaugheyd/data/scEiaD_modeling/mm111_mature_eye_full/
source('analysis_scripts.R')
mm_obs <- pull_obs('~/data/scEiaD_modeling/mm111_mature_eye_full/mm111_mature_eye_20250114_2000hvg_100e_20l.obs.csv.gz', machine_label = 'MCT_scANVI')
`summarise()` has grouped output by 'leiden3'. You can override using the `.groups` argument.`summarise()` has grouped output by 'leiden3'. You can override using the `.groups` argument.`summarise()` has grouped output by 'leiden3'. You can override using the `.groups` argument.`summarise()` has grouped output by 'leiden3'. You can override using the `.groups` argument.`summarise()` has grouped output by 'leiden3'. You can override using the `.groups` argument.`summarise()` has grouped output by 'leiden3'. You can override using the `.groups` argument.`summarise()` has grouped output by 'leiden3'. You can override using the `.groups` argument.
mm_obs$obs <- mm_obs$obs %>%
dplyr::rename(barcode =barcodei) %>%
left_join(sceiad_meta %>% select(barcode = Barcode, CellType_predict), by = 'barcode')
a <- mm_obs$obs %>%
left_join(consensus %>% select(barcode, consensus = value, consensus_count = Count)) %>%
group_by(consensus) %>%
sample_n(5000, replace = TRUE) %>%
unique() %>%
ggplot(aes(x=umap1,y=umap2)) +
scattermore::geom_scattermore(aes(color = consensus), pointsize = 0.8, alpha = 0.5) +
ggrepel::geom_label_repel(data = . %>% group_by(consensus) %>%
summarise(umap1 = median(umap1),
umap2 = median(umap2)),
aes(label = consensus, color = consensus)) +
scale_color_manual(values = c(pals::alphabet2(), pals::glasbey()) %>% unname()) +
cowplot::theme_cowplot() + theme(legend.position = "none") +
ggtitle("consensus")
Joining with `by = join_by(barcode)`Warning: Detected an unexpected many-to-many relationship between `x` and `y`.
b <- mm_obs$obs %>%
left_join(mm_obs$labels, by = 'leiden3') %>%
ggplot(aes(x=umap1,y=umap2)) +
scattermore::geom_scattermore(aes(color = MajorCellType), pointsize = 0.8, alpha = 0.5) +
ggrepel::geom_label_repel(data = . %>% group_by(MajorCellType) %>%
summarise(umap1 = median(umap1),
umap2 = median(umap2)),
aes(label = MajorCellType, color = MajorCellType)) +
scale_color_manual(values = c(pals::alphabet2(), pals::glasbey()) %>% unname()) +
cowplot::theme_cowplot() + theme(legend.position = "none") +
ggtitle("MajorCellType")
cowplot::plot_grid(a,b,nrow = 1)
Warning: Removed 1 row containing missing values or values outside the scale range (`geom_label_repel()`).
c <- mm_obs$obs %>%
left_join(consensus %>% select(barcode, consensus = value, consensus_count = Count)) %>%
group_by(consensus) %>%
sample_n(5000, replace = TRUE) %>%
unique() %>%
ggplot(aes(x=umap1,y=umap2)) +
scattermore::geom_scattermore(aes(color = consensus), pointsize = 4.8, alpha = 0.5) +
facet_wrap(~study_accession) +
scale_color_manual(values = c(pals::alphabet2(), pals::glasbey()) %>% unname()) +
cowplot::theme_cowplot() + theme(legend.position = "none") +
ggtitle("color by consensus, split by study")
Joining with `by = join_by(barcode)`Warning: Detected an unexpected many-to-many relationship between `x` and `y`.
d <- mm_obs$obs %>%
left_join(consensus %>% select(barcode, consensus = value, consensus_count = Count)) %>%
group_by(consensus) %>%
sample_n(5000, replace = TRUE) %>%
unique() %>%
ggplot(aes(x=umap1,y=umap2)) +
scattermore::geom_scattermore(aes(color = study_accession),pointsize = 4.8, alpha = 1) +
facet_wrap(~consensus) +
scale_color_manual(values = c(pals::alphabet2(), pals::glasbey()) %>% unname()) +
cowplot::theme_cowplot() +
ggtitle("color by study, split by consensus")
Joining with `by = join_by(barcode)`Warning: Detected an unexpected many-to-many relationship between `x` and `y`.
cowplot::plot_grid(c,d,nrow=1)
Goal: remove wacky looking clusters
library(ggtree)
pb <- data.table::fread('~/data/scEiaD_modeling/mm111_mature_eye_full/mm111_mature_eye_20250114_2000hvg_100e_20l.pseudoBulk.leiden3.csv.gz')
colnames(pb) <- gsub("\\.\\d+","",colnames(pb))
hvg <- data.table::fread('~/data/scEiaD_modeling/mm111_mature_eye_full/hvg.2000.csv.gz')[-1,]
rnames <- pb$V1
clust <- str_extract(rnames, '\\d+') %>% as.integer()
pb <- pb[,-1] %>% as.matrix()
row.names(pb) <- as.character(clust)
pb_norm <- metamoRph::normalize_data(t(pb), sample_scale = 'cpm') %>% t()
Sample CPM scaling
log1p scaling
pb_norm <- pb_norm[,hvg$V2]
#pb_norm <- pb_norm[,hvg$V2[!hvg$V2 %in% c(cc_genes,ribo_genes)]]
# https://stats.stackexchange.com/questions/31565/compute-a-cosine-dissimilarity-matrix-in-r
sim <- pb_norm / sqrt(rowSums(pb_norm * pb_norm))
sim <- sim %*% t(sim)
D_sim <- as.dist(1 - sim)
hclust_sim <- hclust(D_sim, method = 'average')
pb_labels <- mm_obs$labels %>% mutate(leiden3=as.character(leiden3))
# machine calls
p <- ggtree(hclust_sim)
p$data <- p$data %>% left_join(pb_labels, by = c("label" = "leiden3"))
f <- p + layout_dendrogram() +
geom_tiplab(aes(label = paste(label, mMCT, studyCount, TotalCount, sep = ' - '), color = mCT)) +
theme_dendrogram(plot.margin=margin(16,16,300,16)) +
scale_color_manual(values = c(pals::alphabet2(), pals::glasbey()) %>% unname()) +
guides(color="none")
# consensus calls
pb_labels <- mm_obs$obs %>%
left_join(consensus %>% select(barcode, consensus = value, consensus_count = Count), by = 'barcode') %>%
group_by(leiden3, consensus) %>%
summarise(Count = n()) %>%
mutate(Ratio = Count / sum(Count)) %>%
filter(Ratio > 0.1) %>%
arrange(-Ratio) %>%
mutate(cperc = paste0(consensus, " (", round(Ratio,2), ")")) %>%
summarise(CTs = paste0(cperc, collapse = ', ')) %>%
mutate(leiden3 = as.character(leiden3),
CTm = gsub(" \\(.*","",CTs))
Warning: Detected an unexpected many-to-many relationship between `x` and `y`.`summarise()` has grouped output by 'leiden3'. You can override using the `.groups` argument.
p <- ggtree(hclust_sim)
p$data <- p$data %>% left_join(pb_labels, by = c("label" = "leiden3"))
g <- p + layout_dendrogram() +
geom_tiplab(aes(label = paste(label,CTs, sep = ' - '), color = CTm)) +
theme_dendrogram(plot.margin=margin(16,16,300,16)) +
scale_color_manual(values = c(pals::alphabet2(), pals::glasbey()) %>% unname()) +
guides(color="none")
h <- mm_obs$obs %>% mutate(leiden3= as.factor(leiden3)) %>%
ggplot(aes(x=umap1,y=umap2)) +
scattermore::geom_scattermore(aes(color = leiden3), pointsize = 0.8, alpha = 0.5) +
ggrepel::geom_text_repel(data = . %>% group_by(leiden3) %>%
summarise(umap1 = median(umap1),
umap2 = median(umap2)),
aes(label = leiden3, color = leiden3), bg.color = "white") +
scale_color_manual(values = c(pals::alphabet2(), pals::glasbey(), pals::kelly(), pals::alphabet(), pals::brewer.set1(n=10)) %>% unname()) +
cowplot::theme_cowplot() + theme(legend.position = "none") +
ggtitle("consensus")
cowplot::plot_grid(cowplot::plot_grid(f,g, nrow =2),
cowplot::plot_grid(a,h, nrow = 1),
nrow = 2, rel_heights = c(1,0.4))
Warning: Removed 1 row containing missing values or values outside the scale range (`geom_label_repel()`).
remove_leiden3 <- c(105,
97,
101,
71,
16,
6,
87,
99,
73, 39, 60, 82)
mm_obs$obs %>%
left_join(consensus %>% select(barcode, consensus = value, consensus_count = Count)) %>%
group_by(consensus) %>%
sample_n(5000, replace = TRUE) %>%
unique() %>%
mutate(remove = case_when(leiden3 %in% remove_leiden3 ~ 'remove',
TRUE ~ 'retain')) %>%
ggplot(aes(x=umap1,y=umap2)) +
scattermore::geom_scattermore(aes(color = consensus), pointsize = 3.8, alpha = 0.5) +
scale_color_manual(values = c(pals::alphabet2(), pals::glasbey()) %>% unname()) +
cowplot::theme_cowplot() + theme(legend.position = "none") +
ggtitle("consensus") +
facet_wrap(~remove)
Joining with `by = join_by(barcode)`Warning: Detected an unexpected many-to-many relationship between `x` and `y`.
Easy is any cluster with >90% consensus
Medium is any cluster that I can easily hand verify as >90% (often you get combos that are effectively the same).
easy_calls <- mm_obs$obs %>%
filter(!leiden3 %in% remove_leiden3) %>%
left_join(consensus %>% select(barcode, consensus = value, consensus_count = Count), by = 'barcode') %>%
group_by(leiden3, consensus) %>%
summarise(Count = n()) %>%
mutate(Ratio = Count / sum(Count)) %>%
filter(Ratio > 0.9, !is.na(consensus))
Warning: Detected an unexpected many-to-many relationship between `x` and `y`.`summarise()` has grouped output by 'leiden3'. You can override using the `.groups` argument.
remainder_01 <- mm_obs$obs %>% filter(!leiden3 %in% easy_calls$leiden3) %>%
filter(!leiden3 %in% remove_leiden3) %>%
left_join(ctp_mm111__mrca %>% select(barcode,CT__chen_mrca), by = 'barcode') %>%
left_join(ctp_mm111_sceiad %>% select(barcode,CT__sceiad_20250107_full_mmGeneFilter, CT__sanes_complete_ocular_atlas_SCP2310), by = 'barcode')
remainder_01 %>%
group_by(leiden3, MajorCellType, CT__sceiad_20250107_full_mmGeneFilter, CT__chen_mrca) %>%
summarise(Count =n()) %>%
left_join(remainder_01 %>% group_by(leiden3) %>% summarise(lCount = n()), by ='leiden3') %>%
mutate(Ratio = Count/lCount) %>% filter(Ratio > 0.05) %>%
DT::datatable()
`summarise()` has grouped output by 'leiden3', 'MajorCellType', 'CT__sceiad_20250107_full_mmGeneFilter'. You can override using the `.groups` argument.
cts <- list()
cts$cone <- c(18, 28, 94)
cts$amacrine <- c(14, 23, 38)
cts$`bipolar (rod)` <- c(5)
cts$mueller <- c(45)
cts$microglia <- c(74, 82, 83)
cts$monocyte <- c(88)
cts$`retinal ganglion` <- c(7, 70)
cts$pericyte <- c(34)
cts$astrocyte <- c(81)
Mostly rarer front eye. Adding in the Sanes “Complete Ocular Atlas” predictions
remainder_02 <- remainder_01 %>% filter(!leiden3 %in% (cts %>% unlist()))
remainder_02 %>%
group_by(leiden, leiden2) %>%
summarise(Count = n()) %>%
mutate(Ratio = Count/sum(Count)) %>%
filter(Ratio > 0.1)
`summarise()` has grouped output by 'leiden'. You can override using the `.groups` argument.
remainder_02 %>%
group_by(leiden3, MajorCellType, CT__sanes_complete_ocular_atlas_SCP2310, CT__sceiad_20250107_full_mmGeneFilter, CT__chen_mrca) %>%
summarise(Count =n()) %>%
left_join(remainder_02 %>% group_by(leiden3) %>% summarise(lCount = n()), by ='leiden3') %>%
mutate(Ratio = Count/lCount) %>% filter(Ratio > 0.05) %>%
DT::datatable()
`summarise()` has grouped output by 'leiden3', 'MajorCellType', 'CT__sanes_complete_ocular_atlas_SCP2310', 'CT__sceiad_20250107_full_mmGeneFilter'. You can override using the `.groups` argument.
cts$epithelial <- c(10,29, 77, 91)
cts$fibroblast <- c(31, 92)
cts$`ciliary body` <- c(89)
cts$`t/nk` <- c(96)
cts$mast <- c(104)
tuned_calls <- bind_rows(easy_calls %>% dplyr::rename(CT=consensus), cts %>% enframe(name = 'CT', value = 'leiden3') %>% unnest())
Warning: `cols` is now required when using `unnest()`.
ℹ Please use `cols = c(leiden3)`.
mm_obs$obs %>%
filter(!leiden3 %in% remove_leiden3) %>%
left_join(tuned_calls, by = 'leiden3') %>%
group_by(CT) %>%
sample_n(5000, replace = TRUE) %>%
unique() %>%
ggplot(aes(x=umap1,y=umap2)) +
scattermore::geom_scattermore(aes(color = CT), pointsize = 1.2, alpha = 0.5) +
ggrepel::geom_label_repel(data = . %>% group_by(CT) %>%
summarise(umap1 = median(umap1),
umap2 = median(umap2)),
aes(label = CT, color = CT)) +
scale_color_manual(values = c(pals::alphabet2(), pals::glasbey()) %>% unname()) +
cowplot::theme_cowplot() + theme(legend.position = "none") +
ggtitle("Tuned CT")
diff_mm <- data.table::fread("/Users/mcgaugheyd/git/scEiaD_modeling/data/mm111_mature_eye_20250114_2000hvg_100e_20l.difftesting.leiden3.csv.gz")
conv_table <- AnnotationDbi::select(org.Mm.eg.db::org.Mm.eg.db,
keys=gsub('\\.\\d+','',unique(diff_mm$names)),
columns=c("ENSEMBL","SYMBOL", "GENENAME", "ENTREZID"), keytype="ENSEMBL")
'select()' returned 1:many mapping between keys and columns
tib <- diff_mm %>%
left_join(tuned_calls, by = c("base" = 'leiden3')) %>%
filter(CT %in% c("cone","rod")) %>%
mutate(ENSEMBL = gsub("\\.\\d+","",names)) %>%
left_join(conv_table) %>%
filter(SYMBOL %in% str_to_title(c('ARR3','OPN1LW','OPN1SW','RHO', 'OPN1MW', 'RCVRN',"CRX","PROM1"))) %>%
select(SYMBOL, base, logfoldchanges) %>%
pivot_wider(values_from = logfoldchanges, names_from = base)
Joining with `by = join_by(ENSEMBL)`Warning: Detected an unexpected many-to-many relationship between `x` and `y`.
mat <- tib %>% select(-1) %>% as.matrix()
row.names(mat) <- tib %>% pull(1)
col_fun = circlize::colorRamp2(c(-5, 0, 5), c("blue", "white", "red"))
ComplexHeatmap::Heatmap(mat, col=col_fun)
hrct <- list()
hrct$`cone (s)` <- c(18)
hrct$`cone (ml)` <- c(28,66,30,94)
mm_obs$obs %>%
left_join(hrct %>% enframe(name = 'CT', value = 'leiden3') %>% unnest(), by ='leiden3') %>%
filter(!is.na(CT)) %>%
group_by(CT) %>%
sample_n(5000, replace = TRUE) %>%
unique() %>%
ggplot(aes(x=umap1,y=umap2)) +
scattermore::geom_scattermore(aes(color = CT), pointsize = 0.8, alpha = 0.5) +
ggrepel::geom_label_repel(data = . %>% group_by(CT) %>%
summarise(umap1 = median(umap1),
umap2 = median(umap2)),
aes(label = CT, color = CT)) +
scale_color_manual(values = c(pals::alphabet2(), pals::glasbey()) %>% unname()) +
cowplot::theme_cowplot() + theme(legend.position = "none") +
ggtitle("Photoreceptors")
Warning: `cols` is now required when using `unnest()`.
ℹ Please use `cols = c(leiden3)`.
tib <- diff_mm %>%
left_join(tuned_calls, by = c("base" = 'leiden3')) %>%
filter(grepl("bipolar", CT)) %>%
mutate(ENSEMBL = gsub("\\.\\d+","",names)) %>%
left_join(conv_table) %>%
filter(SYMBOL %in% str_to_title(c('PRKCA','GRM6','GRIK1','NIF3L1',
'LINC00470','DOK5','NELL2','STX18',
'ODF2L','FAM19A4','MEIS2','CALB1', 'FUT4',
'SCG2','LRPPRC','FEZF1' ))) %>%
select(SYMBOL, base, logfoldchanges) %>%
pivot_wider(values_from = logfoldchanges, names_from = base)
Joining with `by = join_by(ENSEMBL)`Warning: Detected an unexpected many-to-many relationship between `x` and `y`.
mat <- tib %>% select(-1) %>% as.matrix()
row.names(mat) <- tib %>% pull(1)
col_fun = circlize::colorRamp2(c(-5, 0, 5), c("blue", "white", "red"))
ComplexHeatmap::Heatmap(mat, col=col_fun)
hrct$`bipolar (rod)` <- c(5,26)
hrct$`bipolar (off)` <- c(19,32,46,52)
hrct$`bipolar (on)`<- tuned_calls %>% filter(grepl("bipolar",CT)) %>% pull(leiden3)
hrct$`bipolar (on)` <- hrct$`bipolar (on)`[!hrct$`bipolar (on)` %in%
c(hrct$`bipolar (rod)`, hrct$`bipolar (off)`)]
mm_obs$obs %>%
left_join(hrct %>% enframe(name = 'CT', value = 'leiden3') %>% unnest(), by ='leiden3') %>%
filter(grepl("bipolar", CT)) %>%
group_by(CT) %>%
sample_n(5000, replace = TRUE) %>%
unique() %>%
ggplot(aes(x=umap1,y=umap2)) +
scattermore::geom_scattermore(aes(color = CT), pointsize = 0.8, alpha = 0.5) +
ggrepel::geom_label_repel(data = . %>% group_by(CT) %>%
summarise(umap1 = median(umap1),
umap2 = median(umap2)),
aes(label = CT, color = CT)) +
scale_color_manual(values = c(pals::alphabet2(), pals::glasbey()) %>% unname()) +
cowplot::theme_cowplot() + theme(legend.position = "none") +
ggtitle("Bipolar")
Warning: `cols` is now required when using `unnest()`.
ℹ Please use `cols = c(leiden3)`.
tib <- diff_mm %>%
left_join(tuned_calls, by = c("base" = 'leiden3')) %>%
filter(grepl("amacrine", CT)) %>%
mutate(ENSEMBL = gsub("\\.\\d+","",names)) %>%
left_join(conv_table) %>%
filter(SYMBOL %in% str_to_title(c('GAD1','GAD2','SLC6A9','NFIA'))) %>%
select(SYMBOL, base, logfoldchanges) %>%
pivot_wider(values_from = logfoldchanges, names_from = base)
Joining with `by = join_by(ENSEMBL)`Warning: Detected an unexpected many-to-many relationship between `x` and `y`.
mat <- tib %>% select(-1) %>% as.matrix()
row.names(mat) <- tib %>% pull(1)
col_fun = circlize::colorRamp2(c(-5, 0, 5), c("blue", "white", "red"))
ComplexHeatmap::Heatmap(mat, col=col_fun)
hrct$`amacrine (glycinergic)` <- c(47,80,54,42,12,58,72,48)
hrct$`amacrine (gaba/glyci)` <- c(23)
hrct$`amacrine (gabanergic)`<- tuned_calls %>% filter(grepl("amacrine",CT)) %>% pull(leiden3)
hrct$`amacrine (gabanergic)` <- hrct$`amacrine (gabanergic)`[!hrct$`amacrine (gabanergic)` %in%
c(hrct$`amacrine (glycinergic)`, hrct$`amacrine (gaba/glyci)`)]
mm_obs$obs %>%
left_join(hrct %>% enframe(name = 'CT', value = 'leiden3') %>% unnest(), by ='leiden3') %>%
filter(grepl("amacrine", CT)) %>%
group_by(CT) %>%
sample_n(5000, replace = TRUE) %>%
unique() %>%
ggplot(aes(x=umap1,y=umap2)) +
scattermore::geom_scattermore(aes(color = CT), pointsize = 0.8, alpha = 0.5) +
ggrepel::geom_label_repel(data = . %>% group_by(CT) %>%
summarise(umap1 = median(umap1),
umap2 = median(umap2)),
aes(label = CT, color = CT)) +
scale_color_manual(values = c(pals::alphabet2(), pals::glasbey()) %>% unname()) +
cowplot::theme_cowplot() + theme(legend.position = "none") +
ggtitle("Amacrine")
Warning: `cols` is now required when using `unnest()`.
ℹ Please use `cols = c(leiden3)`.
Didn’t see the H1/H2 distinction with ISL1 / LHX2
All are H1?
tib <- diff_mm %>%
left_join(tuned_calls, by = c("base" = 'leiden3')) %>%
#filter(grepl("horizont", CT)) %>%
mutate(ENSEMBL = gsub("\\.\\d+","",names)) %>%
left_join(conv_table) %>%
filter(SYMBOL %in% str_to_title(c('LHX1','ISL1','ONECUT1','ONECUT2'))) %>%
select(SYMBOL, base, logfoldchanges) %>%
pivot_wider(values_from = logfoldchanges, names_from = base)
Joining with `by = join_by(ENSEMBL)`Warning: Detected an unexpected many-to-many relationship between `x` and `y`.
mat <- tib %>% select(-1) %>% as.matrix()
row.names(mat) <- tib %>% pull(1)
col_fun = circlize::colorRamp2(c(-5, 0, 5), c("blue", "white", "red"))
ComplexHeatmap::Heatmap(mat, col=col_fun)
Couldn’t make any sense of them using the markers I curated in “Human_Mature_eye_full__stage4_neural.Rmd”
tib <- diff_mm %>%
left_join(tuned_calls, by = c("base" = 'leiden3')) %>%
filter(CT == 'retinal ganglion') %>%
mutate(ENSEMBL = gsub("\\.\\d+","",names)) %>%
left_join(conv_table) %>%
filter(SYMBOL %in% str_to_title(c('TPBG','TBR1','FABP4','CHRNA2', 'LMO2',
'EOMES','SSTR2','FOXP2','FOXP1','PRR35','CARTPT',
'CDKN2A','ARPP21','OPN4', 'NEFM',
'TUBB3'))) %>%
select(SYMBOL, base, logfoldchanges) %>%
pivot_wider(values_from = logfoldchanges, names_from = base)
Joining with `by = join_by(ENSEMBL)`Warning: Detected an unexpected many-to-many relationship between `x` and `y`.
mat <- tib %>% select(-1) %>% as.matrix()
row.names(mat) <- tib %>% pull(1)
col_fun = circlize::colorRamp2(c(-5, 0, 5), c("blue", "white", "red"))
ComplexHeatmap::Heatmap(mat, col=col_fun)
NA
NA
tib <- diff_mm %>%
left_join(tuned_calls, by = c("base" = 'leiden3')) %>%
filter(grepl("mast|mono|nk|microglia",CT)) %>%
mutate(ENSEMBL = gsub("\\.\\d+","",names)) %>%
left_join(conv_table) %>%
filter(SYMBOL %in% str_to_title(c("CD68",
"TMEM119", "Olfml3", # monocyte
"Emilin2", "IL1RL1", # mast
"CD4", #tcell
"CD163",# macrophaege
"CD14"))) %>%
mutate(base = as.character(base)) %>%
mutate(base = case_when(grepl("mast|mono|nk|microglia", CT) ~ paste0(base, ' - ', CT),
TRUE ~ base)) %>%
select(SYMBOL, base, logfoldchanges) %>%
pivot_wider(values_from = logfoldchanges, names_from = base)
Joining with `by = join_by(ENSEMBL)`Warning: Detected an unexpected many-to-many relationship between `x` and `y`.
mat <- tib %>% select(-1) %>% as.matrix()
row.names(mat) <- tib %>% pull(1)
col_fun = circlize::colorRamp2(c(-5, 0, 5), c("blue", "white", "red"))
ComplexHeatmap::Heatmap(mat, col=col_fun)
hrct$macrophage <- c(74, 83)
hr_tuned_calls <- tuned_calls %>% left_join(
hrct %>%
enframe(name = 'hrCT', value = 'leiden3') %>%
unnest(),
by = 'leiden3'
) %>%
mutate(hrCT = case_when(is.na(hrCT) ~ CT,
TRUE ~ hrCT))
Warning: `cols` is now required when using `unnest()`.
ℹ Please use `cols = c(leiden3)`.
mm_obs$obs %>%
filter(!leiden3 %in% remove_leiden3) %>%
left_join(hr_tuned_calls, by = 'leiden3') %>%
group_by(CT) %>%
sample_n(5000, replace = TRUE) %>%
unique() %>%
ggplot(aes(x=umap1,y=umap2)) +
scattermore::geom_scattermore(aes(color = hrCT), pointsize = 1.2, alpha = 0.5) +
ggrepel::geom_label_repel(data = . %>% group_by(CT, hrCT) %>%
summarise(umap1 = median(umap1),
umap2 = median(umap2)),
aes(label = hrCT, color = hrCT)) +
scale_color_manual(values = c(pals::alphabet2(), pals::glasbey()) %>% unname()) +
cowplot::theme_cowplot() + theme(legend.position = "none") +
ggtitle("Tuned CT")
Output tuned calls, re-run scANVI modeling to finalize CT model. Like the human mature eye models, the covariates (ribosome, mito, etc) were removed from the modelling - this, anecdotally, modestly make the models a little bit worse but will substantially make it easier to apply new data as I don’t have to fuss about with trying to also create the same covariate fields.
output <- mm_obs$obs %>%
filter(!leiden3 %in% remove_leiden3) %>%
left_join(hr_tuned_calls, by = 'leiden3') %>%
# dplyr::rename(tunedCT = CT,
# tuned_hrCT = hrCT) %>%
select(-Count, -Ratio) %>%
relocate(barcode, CellType, MajorCellType, `CT`, `hrCT`) %>%
group_by(across(c(-umap1, -umap2))) %>% summarise(umap1 = mean(umap1), umap2 = mean(umap2))
`summarise()` has grouped output by 'barcode', 'CellType', 'MajorCellType', 'CT', 'hrCT', 'background_fraction', 'cell_probability', 'cell_size', 'droplet_efficiency', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'n_genes', 'n_counts', '_scvi_batch', '_scvi_labels', 'solo_doublet', 'solo_score', 'sample_accession', 'library_layout', 'reference', 'kb_tech', 'umi', 'workflow', 'kb_sum', 'organism', 'platform', 'study_accession', 'tissue', 'covariate', 'integration_group', 'tissuenote', 'source', 'bam10x', 'biosample', 'organ', 'sex', 'biosample_title', 'batch', 'age', 'capture_type', 'SubCellType', 'capture_covariate', 'total_counts_ribo', 'pct_counts_ribo', 'total_counts_protocadherin', 'pct_counts_protocadherin', 'leiden', 'leiden2', 'leiden3', 'leiden5', 'MCT_scANVI', 'MCT_scANVI_max_score', 'side'. You can override using the `.groups` argument.
#output %>%
# write_csv(file = "~/git/scEiaD_modeling/data/mm111_adult_eye_tuned_CT_calls.20250120.obs.csv.gz")
set.seed(20250120)
mm111_full_eye_ref_bcs2 <- output %>%
group_by(study_accession, hrCT) %>%
sample_n(500, replace = TRUE) %>%
unique()
mm111_full_eye_query_bcs2 <- output %>%
filter(!barcode %in% mm111_full_eye_ref_bcs2$barcode)
#mm111_full_eye_ref_bcs2$barcode %>% write(gzfile('~/git/scEiaD_modeling/data/mm111_mature_eye_ref_bcs.full.20250120.stage4.csv.gz'))
#mm111_full_eye_query_bcs2$barcode %>% write(gzfile('~/git/scEiaD_modeling/data/mm111_mature_eye_query_bcs.full.20250120.stage4.csv.gz'))
cd /data/OGVFB_BG/scEiaD/2024_02_28/snakeout/mm111_mature_eye_full/stage4
source /data/$USER/conda/etc/profile.d/conda.sh && source /data/$USER/conda/etc/profile.d/mamba.sh
mamba activate rapids_singlecell
python ~/git/scEiaD_modeling/workflow/scripts/append_obs.py ../../../mm111.adata.solo.20240827.h5ad /home/mcgaugheyd/git/scEiaD_modeling/data/mm111_adult_eye_tuned_CT_calls.20250120.obs.csv.gz mm111_mature_eye_20250120_full_2000hvg_100e_20l_stage4.h5ad --transfer_columns CT,hrCT
note - adding more epochs (was previously hardcoded to 50) to the scanvi modelling step seems to make a noticeable improvement in accuracy. I had previously set this to 50 because it is such a slow step - especially when running it across the (much larger) human eye dataset.
mm_obs4 <-pull_obs('~/data/scEiaD_modeling/mm111_mature_eye_full/mm111_mature_eye_20250120_stage4_noCov_150epo_2000hvg_50e_20l.obs.csv.gz', machine_label = 'scANVI_hrCT', label = 'hrCT')
`summarise()` has grouped output by 'leiden3'. You can override using the `.groups` argument.`summarise()` has grouped output by 'leiden3'. You can override using the `.groups` argument.`summarise()` has grouped output by 'leiden3'. You can override using the `.groups` argument.`summarise()` has grouped output by 'leiden3'. You can override using the `.groups` argument.`summarise()` has grouped output by 'leiden3'. You can override using the `.groups` argument.`summarise()` has grouped output by 'leiden3'. You can override using the `.groups` argument.`summarise()` has grouped output by 'leiden3'. You can override using the `.groups` argument.
Group by leiden3, cell type and rename cell types in the minority (5% in this case) to the majority cell type call
# retains CT calls >= 0.05 of a cluster. anything below gets changed to the dominant ct
mm_nobs_s4_cleaning <- mm_obs4$obs %>%
group_by(leiden3, scANVI_hrCT) %>%
summarise(Count = n()) %>%
mutate(Ratio = Count / sum(Count)) %>%
mutate(dominant_celltype = scANVI_hrCT[which.max(Count)]) %>%
mutate(CTc = case_when(Ratio < 0.05 ~ dominant_celltype,
TRUE ~ scANVI_hrCT))
`summarise()` has grouped output by 'leiden3'. You can override using the `.groups` argument.
mm_nobs_s4 <- mm_obs4$obs %>%
left_join(mm_nobs_s4_cleaning %>%
select(leiden3, scANVI_hrCT, CTc), by = c("leiden3","scANVI_hrCT"))
# quick compare count diffs
mm_nobs_s4 %>% group_by(hrCT, CTc) %>% mcHelpeRs::sum_rat() %>% data.frame
mm_nobs_s4 %>%
group_by(leiden3) %>%
sample_n(1000, replace = TRUE) %>%
unique() %>%
ggplot(aes(x=umap1,y=umap2)) +
scattermore::geom_scattermore(aes(color = hrCT), pointsize = 0.8, alpha = 0.9) +
ggrepel::geom_label_repel(data = . %>% group_by(hrCT) %>%
summarise(umap1 = median(umap1),
umap2 = median(umap2)),
aes(label = hrCT, color = hrCT)) +
scale_color_manual(values = c(pals::alphabet2(), pals::glasbey()) %>% unname()) +
cowplot::theme_cowplot() + theme(legend.position = "none") +
ggtitle("Stage 4 Tuned CT")
mm_nobs_s4 %>%
group_by(leiden3) %>%
sample_n(1000, replace = TRUE) %>%
unique() %>%
ggplot(aes(x=umap1,y=umap2)) +
scattermore::geom_scattermore(aes(color = CTc), pointsize = 0.8, alpha = 0.9) +
ggrepel::geom_label_repel(data = . %>% group_by(CTc) %>%
summarise(umap1 = median(umap1),
umap2 = median(umap2)),
aes(label = CTc, color = CTc)) +
scale_color_manual(values = c(pals::alphabet2(), pals::glasbey()) %>% unname()) +
cowplot::theme_cowplot() + theme(legend.position = "none") +
ggtitle("Stage 4 Tuned scANVI CT")
mm_obs4$obs %>%
group_by(leiden3) %>%
sample_n(1000, replace = TRUE) %>%
unique() %>%
ggplot(aes(x=umap1,y=umap2)) +
scattermore::geom_scattermore(aes(color = scANVI_hrCT), pointsize = 2.8, alpha = 0.9) +
scale_color_manual(values = c(pals::alphabet2(), pals::glasbey()) %>% unname()) +
cowplot::theme_cowplot() + theme(legend.position = "none") +
ggtitle("Stage 4 scANVI CT") +
facet_wrap(~scANVI_hrCT)
mm_nobs_s4 %>%
group_by(leiden3) %>%
sample_n(1000, replace = TRUE) %>%
unique() %>%
ggplot(aes(x=umap1,y=umap2)) +
scattermore::geom_scattermore(aes(color = CTc), pointsize = 2.8, alpha = 0.9) +
scale_color_manual(values = c(pals::alphabet2(), pals::glasbey()) %>% unname()) +
cowplot::theme_cowplot() + theme(legend.position = "none") +
ggtitle("Stage 4 scANVI CT") +
facet_wrap(~CTc)
From Author Label (after my manual nomenclature normalization)
machine_label = 'scANVI_hrCT'; label = 'MajorCellType'
mm_nobs_s4 %>%
group_by(.data[[label]],.data[[machine_label]]) %>%
summarise(Count = n()) %>%
mutate(Ratio = Count/sum(Count)) %>%
ggplot(aes(x=.data[[label]],y=.data[[machine_label]],fill=Ratio, label = round(Ratio, 2))) +
geom_tile() +
shadowtext::geom_shadowtext() +
cowplot::theme_cowplot() +
scale_fill_viridis_c(begin = 0, end = 1) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
`summarise()` has grouped output by 'MajorCellType'. You can override using the `.groups` argument.
machine_label = 'CTc'; label = 'hrCT'
mm_nobs_s4 %>%
group_by(.data[[label]],.data[[machine_label]]) %>%
summarise(Count = n()) %>%
mutate(Ratio = Count/sum(Count)) %>%
ggplot(aes(x=.data[[label]],y=.data[[machine_label]],fill=Ratio, label = round(Ratio, 2))) +
geom_tile() +
shadowtext::geom_shadowtext() +
cowplot::theme_cowplot() +
scale_fill_viridis_c(begin = 0, end = 1) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
`summarise()` has grouped output by 'hrCT'. You can override using the `.groups` argument.
mm_nobs_s4 %>% write_csv("~/data/scEiaD_modeling/mm111_mature_eye_full/mm111_mature_eye_20250120_stage4_noCov_150epo_2000hvg_50e_20l.tunedStage4.v01.obs.csv.gz")
sessionInfo()
R version 4.4.1 (2024-06-14)
Platform: aarch64-apple-darwin20
Running under: macOS Sonoma 14.7.2
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: America/New_York
tzcode source: internal
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] ggtree_3.12.0 fstcore_0.9.18 lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
[7] purrr_1.0.2 readr_2.1.5 tidyr_1.3.1 tibble_3.2.1 ggplot2_3.5.1 tidyverse_2.0.0
loaded via a namespace (and not attached):
[1] RColorBrewer_1.1-3 shape_1.4.6.1 rstudioapi_0.16.0
[4] jsonlite_1.8.8 magrittr_2.0.3 magick_2.8.5
[7] farver_2.1.2 rmarkdown_2.27 GlobalOptions_0.1.2
[10] fs_1.6.4 zlibbioc_1.50.0 vctrs_0.6.5
[13] Cairo_1.6-2 memoise_2.0.1 DelayedMatrixStats_1.26.0
[16] htmltools_0.5.8.1 S4Arrays_1.4.1 BiocNeighbors_1.22.0
[19] SparseArray_1.4.8 gridGraphics_0.5-1 sass_0.4.9
[22] bslib_0.8.0 htmlwidgets_1.6.4 cachem_1.1.0
[25] igraph_2.0.3 iterators_1.0.14 lifecycle_1.0.4
[28] pkgconfig_2.0.3 rsvd_1.0.5 Matrix_1.7-0
[31] R6_2.5.1 fastmap_1.2.0 clue_0.3-65
[34] GenomeInfoDbData_1.2.12 MatrixGenerics_1.16.0 digest_0.6.36
[37] aplot_0.2.3 colorspace_2.1-1 patchwork_1.2.0
[40] AnnotationDbi_1.66.0 S4Vectors_0.42.1 dqrng_0.4.1
[43] irlba_2.3.5.1 crosstalk_1.2.1 GenomicRanges_1.56.1
[46] RSQLite_2.3.7 beachmat_2.20.0 labeling_0.4.3
[49] org.Mm.eg.db_3.19.1 fansi_1.0.6 timechange_0.3.0
[52] httr_1.4.7 abind_1.4-5 compiler_4.4.1
[55] doParallel_1.0.17 bit64_4.0.5 withr_3.0.0
[58] BiocParallel_1.38.0 DBI_1.2.3 R.utils_2.12.3
[61] maps_3.4.2 DelayedArray_0.30.1 rjson_0.2.21
[64] bluster_1.14.0 tools_4.4.1 ape_5.8
[67] fst_0.9.8 R.oo_1.26.0 glue_1.7.0
[70] nlme_3.1-165 shadowtext_0.1.4 grid_4.4.1
[73] cluster_2.1.6 generics_0.1.3 gtable_0.3.5
[76] tzdb_0.4.0 R.methodsS3_1.8.2 data.table_1.16.99
[79] hms_1.1.3 BiocSingular_1.20.0 ScaledMatrix_1.12.0
[82] metapod_1.12.0 utf8_1.2.4 XVector_0.44.0
[85] BiocGenerics_0.50.0 foreach_1.5.2 ggrepel_0.9.5
[88] pillar_1.9.0 vroom_1.6.5 yulab.utils_0.1.5
[91] limma_3.60.4 pals_1.9 circlize_0.4.16
[94] treeio_1.28.0 lattice_0.22-6 bit_4.0.5
[97] tidyselect_1.2.1 ComplexHeatmap_2.20.0 SingleCellExperiment_1.26.0
[100] locfit_1.5-9.10 Biostrings_2.72.1 scuttle_1.14.0
[103] knitr_1.48 IRanges_2.38.1 edgeR_4.2.1
[106] SummarizedExperiment_1.34.0 scattermore_1.2 stats4_4.4.1
[109] xfun_0.48 Biobase_2.64.0 statmod_1.5.0
[112] matrixStats_1.3.0 DT_0.33 stringi_1.8.4
[115] UCSC.utils_1.0.0 lazyeval_0.2.2 ggfun_0.1.5
[118] yaml_2.3.10 evaluate_0.24.0 codetools_0.2-20
[121] ggplotify_0.1.2 cli_3.6.3 munsell_0.5.1
[124] jquerylib_0.1.4 dichromat_2.0-0.1 Rcpp_1.0.13
[127] GenomeInfoDb_1.40.1 mapproj_1.2.11 png_0.1-8
[130] parallel_4.4.1 blob_1.2.4 scran_1.32.0
[133] sparseMatrixStats_1.16.0 viridisLite_0.4.2 tidytree_0.4.6
[136] metamoRph_0.2.1 scales_1.3.0 crayon_1.5.3
[139] GetoptLong_1.0.5 rlang_1.1.4 KEGGREST_1.44.1
[142] cowplot_1.1.3